library(runjags)
library(MASS)
library(parallel)

###Load data  
load("gatudata.RData")

##########################
###########Table 1########

##########Proportion fluent in Russian
1-mean(gatudata$rf,na.rm=T)
1-mean(gatudata$rf[gatudata$Gagauz==1],na.rm=T)
1-mean(gatudata$rf[gatudata$Moldovan==1],na.rm=T)
1-mean(gatudata$rf[gatudata$Russian==1],na.rm=T)

##########Proportion fluent in Moldovan
mean(gatudata$mf,na.rm=T)
mean(gatudata$mf[gatudata$Gagauz==1],na.rm=T)
mean(gatudata$mf[gatudata$Moldovan==1],na.rm=T)
mean(gatudata$mf[gatudata$Russian==1],na.rm=T)

##########Proportion fluent in Gagauz
mean(gatudata$gf,na.rm=T)
mean(gatudata$gf[gatudata$Gagauz==1],na.rm=T)
mean(gatudata$gf[gatudata$Moldovan==1],na.rm=T)
mean(gatudata$gf[gatudata$Russian==1],na.rm=T)

################Sums of different groups
nrow(gatudata)
sum(gatudata$Gagauz,na.rm=T)
sum(gatudata$Moldovan,na.rm=T)
sum(gatudata$Russian,na.rm=T)

###########################################################
###########################################################
#Main experimental and observational analyses; analyses of#
#NAs#######################################################
###Figures 3 and 5, Appendices I, J, K##################### 
###########################################################

####Experimental balance checks
summary(lm(tg=="Control" ~ rf + mf + gf + NGagauz + Russian + Ukrainian +  
             Bulgarian + Other + income + age + male + highered + urban, data=gatudata))
summary(lm(tg=="T1" ~ rf + mf + gf + NGagauz + Russian + Ukrainian +  Bulgarian + Other + 
             income + age + male + highered + urban, data=gatudata))
summary(lm(tg=="T2" ~ rf + mf + gf + NGagauz + Russian + Ukrainian +  Bulgarian + Other + 
             income + age + male + highered + urban, data=gatudata))
summary(lm(tg=="T3" ~ rf + mf + gf + NGagauz + Russian + Ukrainian +  Bulgarian + Other + 
             income + age + male + highered + urban, data=gatudata))
####No consistent evidence of balance problems

####Threshold initial values for experimental outcome
taustar<-polr(method="probit", as.factor(DV1) ~ rf + mf + gf + NGagauz + Russian + Ukrainian + 
                Bulgarian + Other + income + age + male + highered + urban, data=gatudata)$zeta
  
####Threshold initial values for observational outcomes (take average over threshold estimates)  
gammastar<-matrix(nrow=7,ncol=4)
for(i in 1:7){
  gammastar[i,]<-polr(method="probit", as.factor(gatudata[,i+25]) ~ gatudata$rf + gatudata$mf +
                        gatudata$gf + gatudata$NGagauz + gatudata$Russian + gatudata$Ukrainian + 
                        gatudata$Bulgarian + gatudata$Other + gatudata$income + gatudata$age + 
                        gatudata$male + gatudata$highered + gatudata$urban)$zeta
}
gammastar<-apply(gammastar,2,mean)

##########Model data  
hmod<-list(N=nrow(gatudata), tg=gatudata$tg, 
           y1=gatudata$DV1, y2=as.matrix(gatudata[,26:32]),
           y1NA=gatudata$DV1na, y2NA=as.matrix(gatudata[,33:39]), 
           g0=rep(0,4), G0=diag(.01,4),
           t0=rep(0,3), T0=diag(.01,3),
           rf=gatudata$rf, mf=gatudata$mf, gf=gatudata$gf, NGagauz=gatudata$NGagauz, 
           Other=gatudata$Other, Ukrainian=gatudata$Ukrainian,
           Russian=gatudata$Russian, Bulgarian=gatudata$Bulgarian, 
           Male=gatudata$male, 
           Age=gatudata$age, Urban=gatudata$urban,
           NIOTA=14, I0=diag(1,14), i0=rep(0,14),
           NALPHA=9, A0=diag(1,9), a0=rep(0,9), 
           B0=diag(1,5), b0=rep(0,5),
           HigherEd=gatudata$highered, Married=gatudata$married, Locality=gatudata$locality, 
           Prof=gatudata$prof, Resp=gatudata$resp, House=gatudata$house, ymin=gatudata$ymin,
           ymax=gatudata$ymax, 
           bI0=rep(0,5), BI0=diag(1,5), ae1=rep(1,14))
  
##################Model
##  hmodel <- run.jags(method="parallel",model="gF2.txt", inits=list(gammastar=gammastar,
##                                                 taustar=taustar), 
##                     monitor=c("alpha", "beta", "tau", "mu.a", "iota", "gamma",
##                               "alphaNA", "betaNA", "mu.aNA", "iotaNA", "pr", "pm",  
##                               "pg", "bI", "betaI","gammaI","alphaI", "pIe", "pIm","e1", 
##                               "rIl","rIh", "Income"), 
##                     data=hmod, n.chains=4, adapt=5000, burnin=50000, sample=1000, thin=500, 
##                     summarise=F,plots=F, modules=c("glm", "lecuyer"))
##  save(hmodel, file = "gF2.RData")

load("gF2.RData")

library(rjags)
library(ggplot2)

#####################Check convergence
h1<-as.mcmc.list(hmodel)
gd<-gelman.diag(h1[,1:367],multivariate = F) ###Exclude imputed income bc many values at ceiling or floor
sum(gd[[1]][,1] > 1.1) 
####All model parameters have rhat < 1.1

#####################Convert to single chain
cds<-as.mcmc(hmodel)

inc<- apply(as.matrix(cds[,368:1203]),1,mean) ####Estimate average income for each draw
age<-mean(gatudata$age) ###average age

############################experimental analysis
####Function to estimate probability respondent supports Gagauz independence across MCMC draws 
#with different ethnic and linguistic characteristics, holding everything else constant at mean
#or mode
bpred<-function(cont,t1,t2,t3,eps,beta,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=36)
  pred<-matrix(nrow=nrow(cont),ncol=36)
  for(i in 1:nrow(cont)){
    ###Gagauz R monolingual
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,2] <-t1[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,3] <-t2[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,4] <-t3[i,]%*%rbind(1,0,0,0,0,0)
    ###Gagauz R/G
    mu[i,5] <-cont[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,6] <-t1[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,7] <-t2[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,8] <-t3[i,]%*%rbind(1,0,0,1,0,0) 
    ###Gagauz G monolingual
    mu[i,9] <-cont[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,10] <-t1[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,11] <-t2[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,12] <-t3[i,]%*%rbind(1,1,0,1,0,0) 
    ###Moldovan R monolingual
    mu[i,13] <-cont[i,]%*%rbind(1,0,0,0,1,0) 
    mu[i,14] <-t1[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,15] <-t2[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,16] <-t3[i,]%*%rbind(1,0,0,0,1,0)       
    ###Moldovan R/M
    mu[i,17] <-cont[i,]%*%rbind(1,0,1,0,1,0) 
    mu[i,18] <-t1[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,19] <-t2[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,20] <-t3[i,]%*%rbind(1,0,1,0,1,0)       
    ###Moldovan M monolingual
    mu[i,21] <-cont[i,]%*%rbind(1,1,1,0,1,0) 
    mu[i,22] <-t1[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,23] <-t2[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,24] <-t3[i,]%*%rbind(1,1,1,0,1,0)    
    ###Russian R monolinual
    mu[i,25] <-cont[i,]%*%rbind(1,0,0,0,1,1) 
    mu[i,26] <-t1[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,27] <-t2[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,28] <-t3[i,]%*%rbind(1,0,0,0,1,1)       
    ###Russian R/M
    mu[i,29] <-cont[i,]%*%rbind(1,0,1,0,1,1) 
    mu[i,30] <-t1[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,31] <-t2[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,32] <-t3[i,]%*%rbind(1,0,1,0,1,1)
    ###Russian R/G
    mu[i,33] <-cont[i,]%*%rbind(1,0,0,1,1,1) 
    mu[i,34] <-t1[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,35] <-t2[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,36] <-t3[i,]%*%rbind(1,0,0,1,1,1)
    for(j in 1:36){
      pred[i,j]<-  1- pnorm(eps[i]  - (mu[i,j] + beta[i,]%*%rbind(inc[i],age)))
    }
  }
  return(pred)
}

###Third threshold
eps<-as.matrix(cds[,44])
####Parameters by experimental conditions
cont<- as.matrix(cds[,seq(1,24,4)])
t1<- as.matrix(cds[,seq(2,24,4)])
t2<- as.matrix(cds[,seq(3,24,4)])
t3<- as.matrix(cds[,seq(4,24,4)])
####Treatment-invariant parameters
beta<- as.matrix(cds[,37:38])

###Apply function
mu<-bpred(cont,t1,t2,t3,eps,beta,inc,age)

##########Table H.2
###Russian prime on monolingual Russian-speaking Gagauz, also in txt
c(median(mu[,3]-mu[,1]), HPDinterval(as.mcmc(mu[,3]-mu[,1]),prob=.95))
###Gagauz prime on monolingual Russian-speaking Gagauz, also in txt
c(median(mu[,4]-mu[,1]), HPDinterval(as.mcmc(mu[,4]-mu[,1]),prob=.95))

###Russian prime on bilingual Gagauz
c(median(mu[,7]-mu[,5]), HPDinterval(as.mcmc(mu[,7]-mu[,5]),prob=.95))
###Gagauz prime on bilingual Gagauz
c(median(mu[,8]-mu[,5]), HPDinterval(as.mcmc(mu[,8]-mu[,5]),prob=.95))

###Gagauz prime on monolingual Gagauz-speaker, also in txt
c(median(mu[,12]-mu[,9]), HPDinterval(as.mcmc(mu[,12]-mu[,9]),prob=.95))

##########Median probability Gagauz speaker in T_Gagauz / bilingual in control / Russian-speaker in
###control supports separatism (in txt) 
median(mu[,12])
median(mu[,5])
median(mu[,1])

#####T_Gagauz effect on Moldovan bilingual
median(mu[,20])
median(mu[,17])
mean(mu[,17]>mu[,20])


#####T_Moldovan effect on Moldovan Russian monolingual
median(mu[,14])
median(mu[,13])
mean(mu[,14]>mu[,13])

###Estimate posterior median and hpd
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.95)

#####Create dataset 
Condition<-rep(c("Control", "T_Moldovan", "T_Russian", "T_Gagauz"),9)
Ethnicity<-c(rep("Gagauz",12),rep("Moldovan",12),rep("Russian",12))
Language<-c(rep("Russian monolingual",4),rep("Russian/Gagauz bilingual",4),
            rep("Gagauz monolingual",4), rep("Russian monolingual",4),
            rep("Russian/Moldovan bilingual",4), rep("Moldovan monolingual",4),
            rep("Russian monolingual",4),rep("Russian/Moldovan bilingual",4),
            rep("Russian/Gagauz bilingual",4))
mu<-data.frame(mum,muhpd,Condition, Ethnicity,Language)
names(mu)[1]<-"value"

####Control condition graphic
muc<-mu[mu$Condition=="Control",]
muc$levels<-interaction(muc$Ethnicity,muc$Language)
muc$levels<-droplevels(muc$levels)
muc$levels = factor(muc$levels,levels(muc$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(muc$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                      "Russian monolingual", " Russian/Gagauz bilingual",        
                      " Russian monolingual", "Russian/Moldovan bilingual",      
                      "  Russian monolingual", " Russian/Moldovan bilingual",  
                      "Moldovan monolingual")

#############Figure J.5
muc$Ethnicity<-as.character(muc$Ethnicity)
ggplot(data = muc, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position=c(.75,.25))  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))



#####Subgroup analyses
mu$levels<-interaction(mu$Ethnicity,mu$Language,mu$Condition)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,sort(levels(mu$levels)))
mu$levels = factor(mu$levels,levels(mu$levels)[c(1:4,9:12,5:8,17:24,13:16,29:32,25:28,33:36)])
levels(mu$levels)<-c("Control", "T_Gagauz", "T_Moldovan", "T_Russian", 
                     " Control", " T_Gagauz", " T_Moldovan", " T_Russian",
                     "  Control", "  T_Gagauz", "  T_Moldovan", "  T_Russian",
                     "   Control", "   T_Gagauz", "   T_Moldovan", "   T_Russian",
                     "    Control", "    T_Gagauz", "    T_Moldovan", "    T_Russian",
                     "     Control", "     T_Gagauz", "     T_Moldovan", "     T_Russian",
                     "      Control", "      T_Gagauz", "      T_Moldovan", "      T_Russian",
                     "       Control", "       T_Gagauz", "       T_Moldovan", "       T_Russian",
                     "        Control", "        T_Gagauz", "        T_Moldovan", "        T_Russian")
mu$Ethnicity<-as.character(mu$Ethnicity)
mu$Language<- factor(mu$Language,levels(as.factor(mu$Language))[c(1,4,3,5,2)])

########Figure 5
ggplot(data = mu[which(mu$Ethnicity=="Gagauz"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(1.5,4.5,5.5,8.5,9.5)) + 
  geom_segment(aes(y=mu[9,1], yend=mu[9,1], x=.5, xend=4.5), color="black") +
  geom_segment(aes(y=mu[5,1], yend=mu[5,1], x=4.5, xend=8.5), color="darkgrey") +
  geom_segment(aes(y=mu[1,1], yend=mu[1,1], x=8.5, xend=12.5), color="lightgrey") +
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

########################Figure J.6
ggplot(data = mu[which(mu$Ethnicity=="Moldovan"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

ggplot(data = mu[which(mu$Ethnicity=="Russian"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))


###################################
############Table J.4##############
#Manually edit to correctly format#
###################################

library(stargazer)
pm<-apply(cds,2,median)
hpd<-HPDinterval(cds)

##########Experimental coefficients
pes<-cbind(pm[seq(1,36,4)], 1, hpd[seq(1,36,4),1], 2, hpd[seq(1,36,4),2], 3,
           pm[seq(2,36,4)], 1, hpd[seq(2,36,4),1], 2, hpd[seq(2,36,4),2], 3,
           pm[seq(3,36,4)], 1, hpd[seq(3,36,4),1], 2, hpd[seq(3,36,4),2], 3,
           pm[seq(4,36,4)], 1, hpd[seq(4,36,4),1], 2, hpd[seq(4,36,4),2], 4)
rownames(pes)<-c("Intercept", "Not fluent in Russian", "Fluent in Moldovan", 
                 "Fluent in Gagauz", "Not Gagauz", "Russian", "Bulgarian",
                 "Ukrainian", "Other")
stargazer(pes, digits=2)

################Condition-invariant coefficients
pes<-cbind(pm[37:44], 1, hpd[37:44,1], 2, hpd[37:44,2], 4)
rownames(pes)<-c("ln(Income)", "ln(Age)", "Male", 
                 "Higher education", "Urban","gamma_1","gamma_2","gamma_3")
stargazer(pes, digits=2)

###############################################################################
##################################Observational analyses#######################
###############################################################################

####Function to estimate probability respondent supports given separatist outcomes across MCMC 
#draws with different ethnic and linguistic characteristics, holding everything else constant at 
#mean or mode
bpred<-function(cont,eps,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=9)
  pred<-matrix(nrow=nrow(cont),ncol=9)
  for(i in 1:nrow(cont)){
    ###monolingual R Gagauz
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0,inc[i],age)
    ###bilingual R/G Gagauz
    mu[i,2] <-cont[i,]%*%rbind(1,0,0,1,0,0,inc[i],age)
    ###monolingual G Gagauz
    mu[i,3] <-cont[i,]%*%rbind(1,1,0,1,0,0,inc[i],age)
    ###monolingual R Moldovan
    mu[i,4] <-cont[i,]%*%rbind(1,0,0,0,1,0,inc[i],age)
    ###bilingual R/M Moldovan
    mu[i,5] <-cont[i,]%*%rbind(1,0,1,0,1,0,inc[i],age)
    ###monolingual M Moldovan 
    mu[i,6] <-cont[i,]%*%rbind(1,1,1,0,1,0,inc[i],age)
    ###monolingual R Russian
    mu[i,7] <-cont[i,]%*%rbind(1,0,0,0,1,1,inc[i],age)
    ###bilingual R/M Russian
    mu[i,8] <-cont[i,]%*%rbind(1,0,1,0,1,1,inc[i],age)
    ###bilingual R/G Russian
    mu[i,9] <-cont[i,]%*%rbind(1,0,0,1,1,1,inc[i],age)
    for(j in 1:9){
      pred[i,j]<-  1- pnorm(eps[i]  - mu[i,j])
    }
  }
  return(pred)
}

####Third threshold (neither/nor vs support)
eps<-as.matrix(cds[,154])

####################Integration w Moldova, Figure I.4
contMol<- as.matrix(cds[,c(seq(54,95,7),seq(117,130,7))])
####################Autonomy in Moldova, Figure I.4
contAut<- as.matrix(cds[,c(seq(55,95,7),seq(118,130,7))])
####################Confederation w Moldova, Figure I.4
contConf<- as.matrix(cds[,c(seq(56,95,7),seq(119,130,7))])
##################Independence, Figure 3
contInd<- as.matrix(cds[,c(seq(57,95,7),seq(120,130,7))])
#################Integration w Russia, Figure I.5
contRus<- as.matrix(cds[,c(seq(58,95,7),seq(121,130,7))])
######################Autonomy in Russia, Figure I.5
contRA<- as.matrix(cds[,c(seq(59,95,7),seq(122,130,7))])
#################Confederation w Russia, Figure I.5
contRC<- as.matrix(cds[,c(seq(60,95,7),seq(123,130,7))])

##########################Figure 3, Table H.2, txt stats####################################
#To create figures/ estimate stats for different outcomes, replace contInd w relevant matrix
############################################################################################
mu<-bpred(contInd,eps,inc,age)


##################Table H.2, txt
#############Difference btw bilingual Gagauz and monolingual R Gagauz
c(median(mu[,2]-mu[,1]), HPDinterval(as.mcmc(mu[,2]-mu[,1]),prob=.95))
#############Difference btw monolingual G Gagauz and monolingual R Gagauz, in txt
c(median(mu[,3]-mu[,1]), HPDinterval(as.mcmc(mu[,3]-mu[,1]),prob=.95))
#############Difference btw monolingual R Gagauz and Moldovan
c(median(mu[,4]-mu[,1]), HPDinterval(as.mcmc(mu[,4]-mu[,1]),prob=.95))
#############Difference btw bilingual Moldovan and monolingual R Moldovan, in txt
c(median(mu[,5]-mu[,4]), HPDinterval(as.mcmc(mu[,5]-mu[,4]),prob=.95))
#############Difference btw monolingual M Moldovan and monolingual R Moldovan, in txt
c(median(mu[,6]-mu[,4]), HPDinterval(as.mcmc(mu[,6]-mu[,4]),prob=.95))
#############Difference btw monolingual R Gagauz and Russian
c(median(mu[,7]-mu[,1]), HPDinterval(as.mcmc(mu[,7]-mu[,1]),prob=.95))
#############Difference btw bilingual R/G Russian and monolingual R Russian
c(median(mu[,8]-mu[,7]), HPDinterval(as.mcmc(mu[,8]-mu[,7]),prob=.95))
#############Difference btw bilingual R/M Russian and monolingual R Russian
c(median(mu[,9]-mu[,7]), HPDinterval(as.mcmc(mu[,9]-mu[,7]),prob=.95))


###################Figure 3
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.95)
Language<-as.factor(c("Russian monolingual","Russian/Gagauz bilingual","Gagauz monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Moldovan monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Russian/Gagauz bilingual"))
Ethnicity <- as.factor(c(rep("Gagauz", 3),rep("Moldovan", 3),rep("Russian", 3)))
mu<-data.frame(mum,muhpd,Language,Ethnicity)
names(mu)[1]<-"value"

mu$levels<-interaction(mu$Ethnicity,mu$Language)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,levels(mu$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(mu$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                     "Russian monolingual", " Russian/Gagauz bilingual",        
                     " Russian monolingual", "Russian/Moldovan bilingual",      
                     "  Russian monolingual", " Russian/Moldovan bilingual",  
                     "Moldovan monolingual")
mu$Ethnicity<-as.character(mu$Ethnicity)

ggplot(data = mu, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position="top")  + 
  coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

################################Table I.1#######################
###########Edit manually to create tables#######################
################################################################

################Integration w Moldova
pes<-cbind(pm[seq(54,151,7)], 1, hpd[seq(54,151,7),1], 2, hpd[seq(54,151,7),2], 3,
           pm[seq(55,151,7)], 1, hpd[seq(55,151,7),1], 2, hpd[seq(55,151,7),2], 3,
           pm[seq(56,151,7)], 1, hpd[seq(56,151,7),1], 2, hpd[seq(56,151,7),2], 3,
           pm[seq(57,151,7)], 1, hpd[seq(57,151,7),1], 2, hpd[seq(57,151,7),2], 4)
rownames(pes)<-c("Intercept", "Not fluent in Russian", "Fluent in Moldovan", 
                 "Fluent in Gagauz", "Not Gagauz", "Russian", "Bulgarian",
                 "Ukrainian", "Other","ln(Income)", "ln(Age)", "Male", 
                 "Higher education", "Urban")
stargazer(pes, digits=2)

################Integration w Russia
pes<-cbind(pm[seq(58,151,7)], 1, hpd[seq(58,151,7),1], 2, hpd[seq(58,151,7),2], 3,
           pm[seq(59,151,7)], 1, hpd[seq(59,151,7),1], 2, hpd[seq(59,151,7),2], 3,
           pm[seq(60,151,7)], 1, hpd[seq(60,151,7),1], 2, hpd[seq(60,151,7),2], 4)
rownames(pes)<-c("Intercept", "Not fluent in Russian", "Fluent in Moldovan", 
                 "Fluent in Gagauz", "Not Gagauz", "Russian", "Bulgarian",
                 "Ukrainian", "Other","ln(Income)", "ln(Age)", "Male", 
                 "Higher education", "Urban")
stargazer(pes, digits=2)

################Thresholds
pes<-cbind(pm[152:155], 1, hpd[152:155,1], 2, hpd[152:155,2], 4)
rownames(pes)<-c("gamma_1", "gamma_2", "gamma_3", "gamma_4")
stargazer(pes, digits=2)

##########################################################
################Item NA analyses, Appendix K##############
##########################################################

########################################Experimental
####Function to estimate probability respondent provides experimental NA across MCMC draws with 
#different ethnic and linguistic characteristics, holding everything else constant at mean or mode
bpred<-function(cont,t1,t2,t3,eps,beta,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=36)
  pred<-matrix(nrow=nrow(cont),ncol=36)
  for(i in 1:nrow(cont)){
    ###Gagauz R monolingual
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,2] <-t1[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,3] <-t2[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,4] <-t3[i,]%*%rbind(1,0,0,0,0,0)
    ###Gagauz R/G bilingual
    mu[i,5] <-cont[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,6] <-t1[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,7] <-t2[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,8] <-t3[i,]%*%rbind(1,0,0,1,0,0) 
    ###Gagauz G monolingual
    mu[i,9] <-cont[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,10] <-t1[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,11] <-t2[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,12] <-t3[i,]%*%rbind(1,1,0,1,0,0) 
    ###Moldovan R monolingual
    mu[i,13] <-cont[i,]%*%rbind(1,0,0,0,1,0) 
    mu[i,14] <-t1[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,15] <-t2[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,16] <-t3[i,]%*%rbind(1,0,0,0,1,0)       
    ###Moldovan R/M bilingual
    mu[i,17] <-cont[i,]%*%rbind(1,0,1,0,1,0) 
    mu[i,18] <-t1[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,19] <-t2[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,20] <-t3[i,]%*%rbind(1,0,1,0,1,0)       
    ###Moldovan M monolingual
    mu[i,21] <-cont[i,]%*%rbind(1,1,1,0,1,0) 
    mu[i,22] <-t1[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,23] <-t2[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,24] <-t3[i,]%*%rbind(1,1,1,0,1,0)    
    ###Russian R monolingual
    mu[i,25] <-cont[i,]%*%rbind(1,0,0,0,1,1) 
    mu[i,26] <-t1[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,27] <-t2[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,28] <-t3[i,]%*%rbind(1,0,0,0,1,1)       
    ###Russian R/M bilingual
    mu[i,29] <-cont[i,]%*%rbind(1,0,1,0,1,1) 
    mu[i,30] <-t1[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,31] <-t2[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,32] <-t3[i,]%*%rbind(1,0,1,0,1,1)
    ###Russian R/G bilingual
    mu[i,33] <-cont[i,]%*%rbind(1,0,0,1,1,1) 
    mu[i,34] <-t1[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,35] <-t2[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,36] <-t3[i,]%*%rbind(1,0,0,1,1,1)
    for(j in 1:36){
      pred[i,j]<-  pnorm(mu[i,j] + beta[i,]%*%rbind(inc[i],age))
    }
  }
  return(pred)
}

###############Coefficients for different conditions
cont<- as.matrix(cds[,seq(156,179,4)])
t1<- as.matrix(cds[,seq(157,179,4)])
t2<- as.matrix(cds[,seq(158,179,4)])
t3<- as.matrix(cds[,seq(159,179,4)])
###############Treatment-invariant coefficients
beta<- as.matrix(cds[,192:193])

############Estimate probabilities
mu<-bpred(cont,t1,t2,t3,eps,beta,inc,age)

####Create dataset
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.9)

Condition<-rep(c("Control", "T_Moldovan", "T_Russian", "T_Gagauz"),9)
Ethnicity<-c(rep("Gagauz",12),rep("Moldovan",12),rep("Russian",12))
Language<-factor(c(rep("Russian monolingual",4),rep("Russian/Gagauz bilingual",4),
            rep("Gagauz monolingual",4), rep("Russian monolingual",4),
            rep("Russian/Moldovan bilingual",4), rep("Moldovan monolingual",4),
            rep("Russian monolingual",4),rep("Russian/Moldovan bilingual",4),
            rep("Russian/Gagauz bilingual",4)))

mu<-data.frame(mum,muhpd,Condition, Ethnicity,Language)
names(mu)[1]<-"value"
mu$levels<-interaction(mu$Ethnicity,mu$Language,mu$Condition)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,sort(levels(mu$levels)))
mu$levels = factor(mu$levels,levels(mu$levels)[c(1:4,9:12,5:8,17:24,13:16,29:32,25:28,33:36)])

levels(mu$levels)<-c("Control", "T_Gagauz", "T_Moldovan", "T_Russian", 
                     " Control", " T_Gagauz", " T_Moldovan", " T_Russian",
                     "  Control", "  T_Gagauz", "  T_Moldovan", "  T_Russian",
                     "   Control", "   T_Gagauz", "   T_Moldovan", "   T_Russian",
                     "    Control", "    T_Gagauz", "    T_Moldovan", "    T_Russian",
                     "     Control", "     T_Gagauz", "     T_Moldovan", "     T_Russian",
                     "      Control", "      T_Gagauz", "      T_Moldovan", "      T_Russian",
                     "       Control", "       T_Gagauz", "       T_Moldovan", "       T_Russian",
                     "        Control", "        T_Gagauz", "        T_Moldovan", "        T_Russian")

mu$Ethnicity<-as.character(mu$Ethnicity)
mu$Language<- factor(mu$Language,levels(mu$Language)[c(1,4,3,5,2)])

######################
######Figure K.3######
######################

ggplot(data = mu[which(mu$Ethnicity=="Gagauz"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

ggplot(data = mu[which(mu$Ethnicity=="Moldovan"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

ggplot(data = mu[which(mu$Ethnicity=="Russian"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

#####################Non-experimental NA
####Function to estimate probability respondent provides NA for given outcome across MCMC draws 
#with different ethnic and linguistic characteristics, holding everything else constant at mean
#or mode
bpred<-function(cont,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=9)
  pred<-matrix(nrow=nrow(cont),ncol=9)
  for(i in 1:nrow(cont)){
    ###monolingual R Gagauz
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0,inc[i],age)
    ###bilingual R/G Gagauz
    mu[i,2] <-cont[i,]%*%rbind(1,0,0,1,0,0,inc[i],age)
    ###monolingual G Gagauz
    mu[i,3] <-cont[i,]%*%rbind(1,1,0,1,0,0,inc[i],age)
    ###monolingual R Moldovan
    mu[i,4] <-cont[i,]%*%rbind(1,0,0,0,1,0,inc[i],age)
    ###bilingual R/M Moldovan
    mu[i,5] <-cont[i,]%*%rbind(1,0,1,0,1,0,inc[i],age)
    ###monolingual M Moldovan 
    mu[i,6] <-cont[i,]%*%rbind(1,1,1,0,1,0,inc[i],age)
    ###monolingual R Rusian
    mu[i,7] <-cont[i,]%*%rbind(1,0,0,0,1,1,inc[i],age)
    ###bilingual R/M Russian
    mu[i,8] <-cont[i,]%*%rbind(1,0,1,0,1,1,inc[i],age)
    ###bilingual R/G Russian
    mu[i,9] <-cont[i,]%*%rbind(1,0,0,1,1,1,inc[i],age)
    for(j in 1:9){
      pred[i,j]<- pnorm(mu[i,j])
    }
  }
  return(pred)
}

########################Different outcome coefficient estimates
####################Integration w Moldova 
contMol<- as.matrix(cds[,c(seq(206,247,7),seq(269,282,7))])
####################Autonomy in Moldova 
contAut<- as.matrix(cds[,c(seq(207,247,7),seq(270,282,7))])
####################Confederation w Moldova 
contConf<- as.matrix(cds[,c(seq(208,247,7),seq(271,282,7))])
##################Independence 
contInd<- as.matrix(cds[,c(seq(209,247,7),seq(272,282,7))])
#################Integration w Russia 
contRus<- as.matrix(cds[,c(seq(210,247,7),seq(273,282,7))])
######################Autonomy in Russia 
contRA<- as.matrix(cds[,c(seq(211,247,7),seq(274,282,7))])
#################Confederation w Russia 
contRC<- as.matrix(cds[,c(seq(212,247,7),seq(275,282,7))])


#################Figure K.2
############To create graphic for relevant outcome, insert outcome matrix here
mu<-bpred(contInd,inc,age)
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.9)
Language<-as.factor(c("Russian monolingual","Russian/Gagauz bilingual","Gagauz monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Moldovan monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Russian/Gagauz bilingual"))
Ethnicity <- as.factor(c(rep("Gagauz", 3),rep("Moldovan", 3),rep("Russian", 3)))
mu<-data.frame(mum,muhpd,Language,Ethnicity)
names(mu)[1]<-"value"
mu$levels<-interaction(mu$Ethnicity,mu$Language)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,levels(mu$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(mu$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                     "Russian monolingual", " Russian/Gagauz bilingual",        
                     " Russian monolingual", "Russian/Moldovan bilingual",      
                     "  Russian monolingual", " Russian/Moldovan bilingual",  
                     "Moldovan monolingual")
mu$Ethnicity<-as.character(mu$Ethnicity)


ggplot(data = mu, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position=c(.75,.25))  + 
  coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))


##################################################
######################Language salience statistics
#############Table B.2############################
##################################################

##############Gagauz government support for language
###Russian
mean(gatudata$ggs_r>3, na.rm=T)
###Gagauz
mean(gatudata$ggs_g>3, na.rm=T)
###Moldovan
mean(gatudata$ggs_m>3, na.rm=T)

##############Moldovan government support for language
###Russian
mean(gatudata$mgs_r>3, na.rm=T)
###Gagauz
mean(gatudata$mgs_g>3, na.rm=T)
###Moldovan
mean(gatudata$mgs_m>3, na.rm=T)

####################Gov comparison in txt
mean(gatudata$ggs_r>gatudata$mgs_r,na.rm=T)
mean(gatudata$ggs_g>gatudata$mgs_g,na.rm=T)
mean(gatudata$ggs_m<gatudata$mgs_m,na.rm=T)

##############Frequency of language use in Gagauzia
###Russian
mean(gatudata$glf_r>3, na.rm=T)
###Gagauz
mean(gatudata$glf_g>3, na.rm=T)
###Moldovan
mean(gatudata$glf_m>3, na.rm=T)

##############Frequency of language use in Moldova
###Russian
mean(gatudata$mlf_r>3, na.rm=T)
###Gagauz
mean(gatudata$mlf_g>3, na.rm=T)
###Moldovan
mean(gatudata$mlf_m>3, na.rm=T)


####################Frequency of use comparison in txt
mean(gatudata$glf_r>gatudata$mlf_r,na.rm=T)
mean(gatudata$glf_g>gatudata$mlf_g,na.rm=T)
mean(gatudata$glf_m<gatudata$mlf_m,na.rm=T)

###Russian to Gagauz comparison in Gagauzia
mean(gatudata$glf_r>gatudata$glf_g,na.rm=T)
mean(gatudata$glf_r<gatudata$glf_g,na.rm=T)

mean(gatudata$ggs_r>gatudata$ggs_g,na.rm=T)
mean(gatudata$ggs_r<gatudata$ggs_g,na.rm=T)

#################################################################
##############################Components of ethnic identity stats
###########################Table C1##############################

###########Create data frame w components by ethnic group
###Note-NAs/DKs coded as 0
eth_id<-data.frame(Component=c("Mother's nationality", "Father's nationality", 
                               "Following cultural traditions", "Knowledge of language", 
                               "Religious belief", "Physical appearance", "Self-identification", 
                               "Location of residence", "Citizenship", "Common history",
                               "Professional traditions", "Marrying co-ethnic", 
                               "Educating children in language"),
                   Russian=c(mean(gatudata$N3a[gatudata$Russian==1]), 
                             mean(gatudata$N3b[gatudata$Russian==1]),
                             mean(gatudata$N3c[gatudata$Russian==1]),
                             mean(gatudata$N3d[gatudata$Russian==1]),
                             mean(gatudata$N3e[gatudata$Russian==1]),
                             mean(gatudata$N3f[gatudata$Russian==1]),
                             mean(gatudata$N3g[gatudata$Russian==1]),
                             mean(gatudata$N3h[gatudata$Russian==1]),
                             mean(gatudata$N3i[gatudata$Russian==1]),
                             mean(gatudata$N3j[gatudata$Russian==1]),
                             mean(gatudata$N3k[gatudata$Russian==1]),
                             mean(gatudata$N3l[gatudata$Russian==1]),
                             mean(gatudata$N3m[gatudata$Russian==1])),
                   Moldovan=c(mean(gatudata$N3a[gatudata$Moldovan==1]), 
                              mean(gatudata$N3b[gatudata$Moldovan==1]),
                              mean(gatudata$N3c[gatudata$Moldovan==1]),
                              mean(gatudata$N3d[gatudata$Moldovan==1]),
                              mean(gatudata$N3e[gatudata$Moldovan==1]),
                              mean(gatudata$N3f[gatudata$Moldovan==1]),
                              mean(gatudata$N3g[gatudata$Moldovan==1]),
                              mean(gatudata$N3h[gatudata$Moldovan==1]),
                              mean(gatudata$N3i[gatudata$Moldovan==1]),
                              mean(gatudata$N3j[gatudata$Moldovan==1]),
                              mean(gatudata$N3k[gatudata$Moldovan==1]),
                              mean(gatudata$N3l[gatudata$Moldovan==1]),
                              mean(gatudata$N3m[gatudata$Moldovan==1])),
                   Gagauz=c(mean(gatudata$N3a[gatudata$Gagauz==1]),
                            mean(gatudata$N3b[gatudata$Gagauz==1]),
                            mean(gatudata$N3c[gatudata$Gagauz==1]),
                            mean(gatudata$N3d[gatudata$Gagauz==1]),
                            mean(gatudata$N3e[gatudata$Gagauz==1]),
                            mean(gatudata$N3f[gatudata$Gagauz==1]),
                            mean(gatudata$N3g[gatudata$Gagauz==1]),
                            mean(gatudata$N3h[gatudata$Gagauz==1]),
                            mean(gatudata$N3i[gatudata$Gagauz==1]),
                            mean(gatudata$N3j[gatudata$Gagauz==1]),
                            mean(gatudata$N3k[gatudata$Gagauz==1]),
                            mean(gatudata$N3l[gatudata$Gagauz==1]),
                            mean(gatudata$N3m[gatudata$Gagauz==1])),
                   Overall=c(mean(gatudata$N3a), 
                             mean(gatudata$N3b),
                             mean(gatudata$N3c),
                             mean(gatudata$N3d),
                             mean(gatudata$N3e),
                             mean(gatudata$N3f),
                             mean(gatudata$N3g),
                             mean(gatudata$N3h),
                             mean(gatudata$N3i),
                             mean(gatudata$N3j),
                             mean(gatudata$N3k),
                             mean(gatudata$N3l),
                             mean(gatudata$N3m)))

####Table, ordered by overall order
eth_id[order(eth_id$Overall, decreasing=T),]

#######################################
#########Age of acquisition, Appendix D
#######################################

############Figure D.1

##########Russian
qplot(aa_r,1-rf,data=gatudata, xlab="Age of acquisition", ylab="Fluent in Russian") + 
  geom_jitter(height=.01) + stat_smooth(method="lm") + theme_bw() + 
  coord_cartesian(ylim=c(0,1),xlim=c(0,50))

###########Moldovan
qplot(aa_m,mf,data=gatudata, xlab="Age of acquisition", ylab="Fluent in Moldovan") + 
  geom_jitter(height=.01) + stat_smooth(method="lm") + theme_bw() + 
  coord_cartesian(ylim=c(0,1),xlim=c(0,50))

##########Gagauz
qplot(aa_g,gf,data=gatudata, xlab="Age of acquisition", ylab="Fluent in Gagauz") + 
  geom_jitter(height=.01) + stat_smooth(method="lm") + theme_bw() + 
  coord_cartesian(ylim=c(0,1),xlim=c(0,50))

################################################
#######Reasons for learning language, Appendix E
################################################

###############Russian
#####Reason for learning language x fluent
raa<-table(gatudata$reason_r,1-gatudata$rf)
raa[,2]/apply(raa,1,sum) ###Proportion fluent

####Family Reason for learning language x fluent
raa<-table(gatudata$freason_r,1-gatudata$rf)
raa[,2]/apply(raa,1,sum) ###Proportion fluent

###############Moldovan
#####Reason for learning language x fluent
maa<-table(gatudata$reason_m,gatudata$mf)
maa[,2]/apply(maa,1,sum) ###Proportion fluent

####Family Reason for learning language x fluent
maa<-table(gatudata$freason_m,gatudata$mf)
maa[,2]/apply(maa,1,sum) ###Proportion fluent


###############Gagauz
#####Reason for learning language x fluent
gaa<-table(gatudata$reason_g,gatudata$gf)
gaa[,2]/apply(gaa,1,sum) ###Proportion fluent

####Family Reason for learning language x fluent
gaa<-table(gatudata$freason_g,gatudata$gf)
gaa[,2]/apply(gaa,1,sum) ###Proportion fluent

############################################
##########Descriptive statistics, Appendix G
############################################

###Table G.1
#####Language
mean(gatudata$rf,na.rm=T)
sum(!is.na(gatudata$rf))

mean(gatudata$mf,na.rm=T)
sum(!is.na(gatudata$mf))

mean(gatudata$gf,na.rm=T)
sum(!is.na(gatudata$gf))

####Ethnicity (no NA)
mean(gatudata$Russian)
mean(gatudata$Moldovan)
mean(gatudata$Gagauz)
mean(gatudata$Ukrainian)
mean(gatudata$Bulgarian)
mean(gatudata$Other)

####Income
income<- apply(as.matrix(cds[,368:1203]),2,mean) ####Estimate average income across draws for respondents
mean(income)
sd(income)

###Age
mean(gatudata$age)
sd(gatudata$age)

###Higher education
mean(gatudata$highered,na.rm=T)
sum(!is.na(gatudata$highered))

##Male 
mean(gatudata$male)

##Urban
mean(gatudata$urban)

#########Table G.2

###Independence
median(gatudata$S2d,na.rm=T)
quantile(gatudata$S2d,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$S2d))
sum(gatudata$S2d_dk)/sum(is.na(gatudata$S2d))

###Integration with Moldova
median(gatudata$S2a,na.rm=T)
quantile(gatudata$S2a,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$S2a))
sum(gatudata$S2a_dk)/sum(is.na(gatudata$S2a))

median(gatudata$S2b,na.rm=T)
quantile(gatudata$S2b,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$S2b))
sum(gatudata$S2b_dk)/sum(is.na(gatudata$S2b))

median(gatudata$S2c,na.rm=T)
quantile(gatudata$S2c,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$S2c))
sum(gatudata$S2c_dk)/sum(is.na(gatudata$S2c))

####One Moldova response
836- sum(is.na(gatudata$S2a) & is.na(gatudata$S2b) & is.na(gatudata$S2c))

###Integration with Russia
median(gatudata$R1a,na.rm=T)
quantile(gatudata$R1a,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$R1a))
sum(gatudata$R1a_dk)/sum(is.na(gatudata$R1a))

################Autonomy w/in Russia
median(gatudata$R1b,na.rm=T)
quantile(gatudata$R1b,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$R1b))
sum(gatudata$R1b_dk)/sum(is.na(gatudata$R1b))

############Confederation w Russia
median(gatudata$R1c,na.rm=T)
quantile(gatudata$R1c,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$R1c))
sum(gatudata$R1c_dk)/sum(is.na(gatudata$R1c))

###One Russia response
836-sum(is.na(gatudata$R1a) & is.na(gatudata$R1b) & is.na(gatudata$R1c))

#####1 Likert scale complete
836-sum(is.na(gatudata$S2d) & is.na(gatudata$S2a) & is.na(gatudata$S2b) & is.na(gatudata$S2c) & is.na(gatudata$R1a) & is.na(gatudata$R1b) & is.na(gatudata$R1c))
#####1 outcome
836-sum(is.na(gatudata$DV1) & is.na(gatudata$S2d) & is.na(gatudata$S2a) & is.na(gatudata$S2b) & is.na(gatudata$S2c) & is.na(gatudata$R1a) & is.na(gatudata$R1b) & is.na(gatudata$R1c))


###Table G.3
###########Outcome
median(gatudata$DV1,na.rm=T)
quantile(gatudata$DV1,na.rm=T)[c(2,4)]
sum(!is.na(gatudata$DV1))
sum(gatudata$DV1_dk)/sum(is.na(gatudata$DV1))

###Treatments
table(gatudata$tg)/836

############################################
#########################T-tests, Appendix J
############################################

###########Table J.2
##########Overall
t.test(gatudata$DV1[which(gatudata$tg=="Control")] > 3, gatudata$DV1[which(gatudata$tg=="T1")] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control")] > 3, gatudata$DV1[which(gatudata$tg=="T2")] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control")] > 3, gatudata$DV1[which(gatudata$tg=="T3")] > 3)

####Fluent in Russian
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$rf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$rf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$rf==0)] > 3)

####Not fluent in Russian
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$rf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$rf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$rf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$rf==1)] > 3)

###Fluent in Moldovan
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$mf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$mf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$mf==1)] > 3)

###Not fluent in Moldovan
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$mf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$mf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$mf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$mf==0)] > 3)

###Fluent in Gagauz
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$gf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$gf==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$gf==1)] > 3)

###Not fluent in Gagauz
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$gf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$gf==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$gf==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$gf==0)] > 3)

####Russian
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Russian==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Russian==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Russian==1)] > 3)

###Not Russian
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Russian==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Russian==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Russian==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Russian==0)] > 3)

###Ethnic Moldovan
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Moldovan==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Moldovan==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Moldovan==1)] > 3)

#####Not Moldovan
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Moldovan==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Moldovan==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Moldovan==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Moldovan==0)] > 3)

####Ethnic Gagauz
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==1)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Gagauz==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==1)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Gagauz==1)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==1)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Gagauz==1)] > 3)

###Not Gagauz
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==0)] > 3, gatudata$DV1[which(gatudata$tg=="T1" & gatudata$Gagauz==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==0)] > 3, gatudata$DV1[which(gatudata$tg=="T2" & gatudata$Gagauz==0)] > 3)
t.test(gatudata$DV1[which(gatudata$tg=="Control" & gatudata$Gagauz==0)] > 3, gatudata$DV1[which(gatudata$tg=="T3" & gatudata$Gagauz==0)] > 3)

###########################################################
#####Analysis with random experimental clustering variation
###############################################Appendix J.6
###########################################################

######################Experimental threshold initial values
taustar<-polr(method="probit", as.factor(DV1) ~ rf + mf + gf + NGagauz + Russian + Ukrainian + Bulgarian + Other + income + 
                age + male + highered + urban, data=gatudata)$zeta

###########Data file
hmod<-list(N=nrow(gatudata), tg=gatudata$tg, 
           y1=gatudata$DV1, y2=as.matrix(gatudata[,26:32]), 
           y1NA=gatudata$DV1na, y2NA=as.matrix(gatudata[,33:39]), 
           g0=rep(0,4), G0=diag(1,4), 
           t0=rep(0,3), T0=diag(.01,3), 
           rf=gatudata$rf, mf=gatudata$mf, gf=gatudata$gf, NGagauz=gatudata$NGagauz, 
           Other=gatudata$Other, Ukrainian=gatudata$Ukrainian, 
           Russian=gatudata$Russian, Bulgarian=gatudata$Bulgarian, 
           Male=gatudata$male, 
           Age=gatudata$age, Urban=gatudata$urban,
           NIOTA=14, I0=diag(1,14), i0=rep(0,14), 
           NALPHA=9, A0=diag(1,9), a0=rep(0,9), 
           B0=diag(1,5), b0=rep(0,5), 
           HigherEd=gatudata$highered, Married=gatudata$married, Locality=gatudata$locality, 
           Prof=gatudata$prof, Resp=gatudata$resp, House=gatudata$house, ymin=gatudata$ymin, 
           ymax=gatudata$ymax, 
           bI0=rep(0,5), BI0=diag(1,5), ae1=rep(1,14))

#hmodel <- run.jags(method="parallel",model="gF2_exp.txt", inits=list(taustar=taustar), 
#                   monitor=c("alpha", "beta", "tau", "mu.a",
#                             "tauExp", 
#                             "pr", "pm",  "pg",
#                             "bI", "betaI","gammaI","alphaI",
#                             "pIe", "pIm","e1", "rIl","rIh",
#                             "Income"), data=hmod, n.chains=4, adapt=5000, burnin=50000, 
#                   sample=1000, thin=500, summarise=F,plots=F, modules=c("glm", "lecuyer"))

#save(hmodel, file = "gF1_exp.RData")
load("gF1_exp.RData")

###Check convergence
h1<-as.mcmc.list(hmodel)
gd<-gelman.diag(h1[,1:126],multivariate = F)
sum(gd[[1]][,1]>1.1)

####Convert to single chain
cds<-as.mcmc(hmodel)

###Mean income and age
inc<- apply(as.matrix(cds[,127:962]),1,mean)
age<-mean(gatudata$age)

############################experimental analysis
####Function to estimate probability respondent supports Gagauz independence across MCMC draws 
#with different ethnic and linguistic characteristics, holding everything else constant at mean
#or mode
bpred<-function(cont,t1,t2,t3,eps,beta,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=36)
  pred<-matrix(nrow=nrow(cont),ncol=36)
  for(i in 1:nrow(cont)){
    ###Gagauz R monolingual
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,2] <-t1[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,3] <-t2[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,4] <-t3[i,]%*%rbind(1,0,0,0,0,0)
    ###Gagauz R/G bilingual
    mu[i,5] <-cont[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,6] <-t1[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,7] <-t2[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,8] <-t3[i,]%*%rbind(1,0,0,1,0,0) 
    ###Gagauz G monolingual
    mu[i,9] <-cont[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,10] <-t1[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,11] <-t2[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,12] <-t3[i,]%*%rbind(1,1,0,1,0,0) 
    ###Moldovan R monolingual
    mu[i,13] <-cont[i,]%*%rbind(1,0,0,0,1,0) 
    mu[i,14] <-t1[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,15] <-t2[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,16] <-t3[i,]%*%rbind(1,0,0,0,1,0)       
    ###Moldovan R/M bilingual
    mu[i,17] <-cont[i,]%*%rbind(1,0,1,0,1,0) 
    mu[i,18] <-t1[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,19] <-t2[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,20] <-t3[i,]%*%rbind(1,0,1,0,1,0)       
    ###Moldovan M monolingual
    mu[i,21] <-cont[i,]%*%rbind(1,1,1,0,1,0) 
    mu[i,22] <-t1[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,23] <-t2[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,24] <-t3[i,]%*%rbind(1,1,1,0,1,0)    
    ###Russian R monolingual
    mu[i,25] <-cont[i,]%*%rbind(1,0,0,0,1,1) 
    mu[i,26] <-t1[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,27] <-t2[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,28] <-t3[i,]%*%rbind(1,0,0,0,1,1)       
    ###Russian R/M bilingual
    mu[i,29] <-cont[i,]%*%rbind(1,0,1,0,1,1) 
    mu[i,30] <-t1[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,31] <-t2[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,32] <-t3[i,]%*%rbind(1,0,1,0,1,1)
    ###Russian R/G bilingual
    mu[i,33] <-cont[i,]%*%rbind(1,0,0,1,1,1) 
    mu[i,34] <-t1[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,35] <-t2[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,36] <-t3[i,]%*%rbind(1,0,0,1,1,1)
    for(j in 1:36){
      pred[i,j]<-  1- pnorm(eps[i]  - (mu[i,j] + beta[i,]%*%rbind(inc[i],age)))
    }
  }
  return(pred)
}

####Third threshold
eps<-as.matrix(cds[,44])

###Experimental coefficients
cont<- as.matrix(cds[,seq(1,24,4)])
t1<- as.matrix(cds[,seq(2,24,4)])
t2<- as.matrix(cds[,seq(3,24,4)])
t3<- as.matrix(cds[,seq(4,24,4)])

###Treatment-invariant coefficients
beta<- as.matrix(cds[,37:38])

###Estimate posterior probabilities
mu<-bpred(cont,t1,t2,t3,eps,beta,inc,age)

###Median and HPD interval
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.95)

########Create dataset
Condition<-rep(c("Control", "T_Moldovan", "T_Russian", "T_Gagauz"),9)
Ethnicity<-c(rep("Gagauz",12),rep("Moldovan",12),rep("Russian",12))
Language<-factor(c(rep("Russian monolingual",4),rep("Russian/Gagauz bilingual",4),
            rep("Gagauz monolingual",4), rep("Russian monolingual",4),
            rep("Russian/Moldovan bilingual",4), rep("Moldovan monolingual",4),
            rep("Russian monolingual",4),rep("Russian/Moldovan bilingual",4),
            rep("Russian/Gagauz bilingual",4)))

mu<-data.frame(mum,muhpd,Condition, Ethnicity,Language)
names(mu)[1]<-"value"

#####Subgroup
mu$levels<-interaction(mu$Ethnicity,mu$Language,mu$Condition)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,sort(levels(mu$levels)))
mu$levels = factor(mu$levels,levels(mu$levels)[c(1:4,9:12,5:8,17:24,13:16,29:32,25:28,33:36)])

levels(mu$levels)<-c("Control", "T_Gagauz", "T_Moldovan", "T_Russian", 
                     " Control", " T_Gagauz", " T_Moldovan", " T_Russian",
                     "  Control", "  T_Gagauz", "  T_Moldovan", "  T_Russian",
                     "   Control", "   T_Gagauz", "   T_Moldovan", "   T_Russian",
                     "    Control", "    T_Gagauz", "    T_Moldovan", "    T_Russian",
                     "     Control", "     T_Gagauz", "     T_Moldovan", "     T_Russian",
                     "      Control", "      T_Gagauz", "      T_Moldovan", "      T_Russian",
                     "       Control", "       T_Gagauz", "       T_Moldovan", "       T_Russian",
                     "        Control", "        T_Gagauz", "        T_Moldovan", "        T_Russian")

mu$Ethnicity<-as.character(mu$Ethnicity)
mu$Language<- factor(mu$Language,levels(mu$Language)[c(1,4,3,5,2)])

####################Figure J.10
ggplot(data = mu[which(mu$Ethnicity=="Gagauz"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

###########Figure J.11
ggplot(data = mu[which(mu$Ethnicity=="Moldovan"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

###########Figure J.12
ggplot(data = mu[which(mu$Ethnicity=="Russian"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

############################################
###############Russian influence, Appendix L
############################################

###Proportion supporting integration with Russia w/o autonomy
mean(gatudata$R1a>3,na.rm=T)

###Proportion supporting autonomy w/in Moldova
mean(gatudata$S2b>3,na.rm=T)


####Figure L.1
qplot(log(Russia+1),1-rf, xlab="Time in Russia", ylab="Fluent in Russian", data=gatudata) + 
  stat_smooth(method="lm") + theme_bw() +  coord_cartesian(ylim=c(-.1,1.1)) + scale_color_grey() + 
  theme(text = element_text(size=15), legend.position  = "top") + geom_jitter(height=.05,width=.05)

####Figure L.2
qplot(log(Russia+1),S2d, xlab="Time in Russia", ylab="Support for independence", data=gatudata) + 
  stat_smooth(method="lm") + scale_color_grey() + theme_bw() + 
  theme(text = element_text(size=15), legend.position = "top") +  geom_jitter(height=.1,width=.05) + 
  coord_cartesian(ylim=c(1,5)) 

qplot(log(Russia+1),R1a, xlab="Time in Russia", ylab="Support for integration with Russia", data=gatudata) + 
  stat_smooth(method="lm") + scale_color_grey() + theme_bw() + 
  theme(text = element_text(size=15), legend.position = "top") +  geom_jitter(height=.1,width=.05) + 
  coord_cartesian(ylim=c(1,5)) 

##########################################
##Media, Appendix M#######################
##########################################

##############Table M.1
table(1-gatudata$rf, 1-gatudata$rf_m)/sum(!is.na(gatudata$rf) & !is.na(gatudata$rf_m) )
table(gatudata$mf, gatudata$mf_m)/sum(!is.na(gatudata$mf) & !is.na(gatudata$mf_m) )
table(gatudata$gf, gatudata$gf_m)/sum(!is.na(gatudata$gf) & !is.na(gatudata$gf_m) )

#####Figure M.4
qplot(as.factor(1-rf), S2a, color=as.factor(mf_m), fill=as.factor(mf_m), data=gatudata[which(!is.na(gatudata$rf) & !is.na(gatudata$mf_m)),],
      xlab="Fluent in Russian", ylab="Supports integration with Moldova", geom="boxplot") + labs(color="Watches TV in Moldovan", fill="Watches TV in Moldovan") + 
  theme_bw() + scale_color_grey() + scale_fill_grey() + theme(text = element_text(size=15), legend.position = "top")

qplot(as.factor(1-rf), S2a, color=as.factor(gf_m), fill=as.factor(gf_m), data=gatudata[which(!is.na(gatudata$rf) & !is.na(gatudata$gf_m)),],
      xlab="Fluent in Russian", ylab="Supports integration with Moldova", geom="boxplot") + labs(color="Watches TV in Gagauz", fill="Watches TV in Gagauz") + 
  theme_bw() + scale_color_grey() + scale_fill_grey() + theme(text = element_text(size=15), legend.position = "top")

qplot(as.factor(1-rf), S2b, color=as.factor(1-rf_m), fill=as.factor(1-rf_m), data=gatudata[which(!is.na(gatudata$rf) & !is.na(gatudata$rf_m)),],
      xlab="Fluent in Russian", ylab="Supports autonomy in Moldova", geom="boxplot") + labs(color="Watches TV in Russian", fill="Watches TV in Russian") + 
  theme_bw() + scale_color_grey() + scale_fill_grey() + theme(text = element_text(size=15), legend.position = "top")

qplot(as.factor(1-rf), S2d, color=as.factor(mf_m), fill=as.factor(mf_m), data=gatudata[which(!is.na(gatudata$rf) & !is.na(gatudata$mf_m)),],
      xlab="Fluent in Russian", ylab="Supports independence", geom="boxplot") + labs(color="Watches TV in Moldovan", fill="Watches TV in Moldovan") + 
  theme_bw() + scale_color_grey() + scale_fill_grey() + theme(text = element_text(size=15), legend.position = "top")

qplot(as.factor(1-rf), R1a, color=as.factor(1-rf_m), fill=as.factor(1-rf_m), data=gatudata[which(!is.na(gatudata$rf) & !is.na(gatudata$rf_m)),],
      xlab="Fluent in Russian", ylab="Supports integration with Russia", geom="boxplot") + labs(color="Watches TV in Russian", fill="Watches TV in Russian") + 
  theme_bw() + scale_color_grey() + scale_fill_grey() + theme(text = element_text(size=15), legend.position = "top")

################################################################
##########Regression analyses of media on support for separatism
################################################################

##############Experimental threshold initial values
taustar<-polr(method="probit", as.factor(DV1) ~ rf_m + mf_m + gf_m+ NGagauz + Russian + Ukrainian + Bulgarian + Other + income + 
                age + male + highered + urban, data=gatudata)$zeta

##############Observational threshold initial values
gammastar<-matrix(nrow=7,ncol=4)
for(i in 1:7){
  gammastar[i,]<-polr(method="probit", as.factor(gatudata[,i+25]) ~ gatudata$rf_m + gatudata$mf_m + 
                        gatudata$gf_m + gatudata$NGagauz + gatudata$Russian + gatudata$Ukrainian + 
                        gatudata$Bulgarian + gatudata$Other + gatudata$income + gatudata$age + 
                        gatudata$male + gatudata$highered + gatudata$urban)$zeta
}

gammastar<-apply(gammastar,2,mean)

###########Data file
hmod<-list(N=nrow(gatudata), tg=gatudata$tg, 
           y1=gatudata$DV1, y2=as.matrix(gatudata[,26:32]), 
           g0=rep(0,4), G0=diag(.01,4), 
           t0=rep(0,3), T0=diag(.01,3), 
           rf=gatudata$rf_m, mf=gatudata$mf_m, gf=gatudata$gf_m, NGagauz=gatudata$NGagauz, 
           Other=gatudata$Other, Ukrainian=gatudata$Ukrainian, 
           Russian=gatudata$Russian, Bulgarian=gatudata$Bulgarian, 
           Male=gatudata$male, 
           Age=gatudata$age, Urban=gatudata$urban,
           NIOTA=14, I0=diag(1,14), i0=rep(0,14), 
           NALPHA=9, A0=diag(1,9), a0=rep(0,9), 
           B0=diag(1,5), b0=rep(0,5), 
           HigherEd=gatudata$highered, Married=gatudata$married, Locality=gatudata$locality, 
           Prof=gatudata$prof, Resp=gatudata$resp, House=gatudata$house, ymin=gatudata$ymin, 
           ymax=gatudata$ymax, 
           bI0=rep(0,5), BI0=diag(1,5), ae1=rep(1,14))


##hmodel <- run.jags(method="parallel",model="gFr.txt", inits=list(gammastar=gammastar,
##                                                                 taustar=taustar), 
##                   monitor=c("alpha", "beta", "tau", "mu.a",
##                             "iota", "gamma",
##                             "pr", "pm",  "pg",
##                             "bI", "betaI","gammaI","alphaI",
##                             "pIe", "pIm","e1", "rIl","rIh",
##                             "Income"), data=hmod, n.chains=4, adapt=5000, burnin=50000, 
##                   sample=1000, thin=500, summarise=F,plots=F, modules=c("glm", "lecuyer"))

##save(hmodel, file = "gF1r.RData")
load("gF1r.RData")

#####Check convergence
cds<-as.mcmc.list(hmodel)
gd<-gelman.diag(cds[,1:219], multivariate = F)
sum(gd[[1]][,1]>1.1)

#####Convert to single chain
cds<-as.mcmc(hmodel)

####Mean income and age across posterior draws
inc<- apply(as.matrix(cds[,220:1055]),1,mean)
age<-mean(gatudata$age)

############################experimental analysis
##############Not reported in text
####Function to estimate probability respondent supports Gagauz independence across MCMC draws 
#with different ethnic and linguistic characteristics, holding everything else constant at mean
#or mode
bpred<-function(cont,t1,t2,t3,eps,beta,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=36)
  pred<-matrix(nrow=nrow(cont),ncol=36)
  for(i in 1:nrow(cont)){
    ###Gagauz R monolingual
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,2] <-t1[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,3] <-t2[i,]%*%rbind(1,0,0,0,0,0)
    mu[i,4] <-t3[i,]%*%rbind(1,0,0,0,0,0)
    ###Gagauz R/G bilingual
    mu[i,5] <-cont[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,6] <-t1[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,7] <-t2[i,]%*%rbind(1,0,0,1,0,0) 
    mu[i,8] <-t3[i,]%*%rbind(1,0,0,1,0,0) 
    ###Gagauz G monolingual
    mu[i,9] <-cont[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,10] <-t1[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,11] <-t2[i,]%*%rbind(1,1,0,1,0,0) 
    mu[i,12] <-t3[i,]%*%rbind(1,1,0,1,0,0) 
    ###Moldovan R monolingual
    mu[i,13] <-cont[i,]%*%rbind(1,0,0,0,1,0) 
    mu[i,14] <-t1[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,15] <-t2[i,]%*%rbind(1,0,0,0,1,0)   
    mu[i,16] <-t3[i,]%*%rbind(1,0,0,0,1,0)       
    ###Moldovan R/M bilingual
    mu[i,17] <-cont[i,]%*%rbind(1,0,1,0,1,0) 
    mu[i,18] <-t1[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,19] <-t2[i,]%*%rbind(1,0,1,0,1,0)   
    mu[i,20] <-t3[i,]%*%rbind(1,0,1,0,1,0)       
    ###Moldovan M monolingual
    mu[i,21] <-cont[i,]%*%rbind(1,1,1,0,1,0) 
    mu[i,22] <-t1[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,23] <-t2[i,]%*%rbind(1,1,1,0,1,0)  
    mu[i,24] <-t3[i,]%*%rbind(1,1,1,0,1,0)    
    ###Russian R monolingual
    mu[i,25] <-cont[i,]%*%rbind(1,0,0,0,1,1) 
    mu[i,26] <-t1[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,27] <-t2[i,]%*%rbind(1,0,0,0,1,1)   
    mu[i,28] <-t3[i,]%*%rbind(1,0,0,0,1,1)       
    ###Russian R/M bilingual
    mu[i,29] <-cont[i,]%*%rbind(1,0,1,0,1,1) 
    mu[i,30] <-t1[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,31] <-t2[i,]%*%rbind(1,0,1,0,1,1)   
    mu[i,32] <-t3[i,]%*%rbind(1,0,1,0,1,1)
    ###Russian R/G bilingual
    mu[i,33] <-cont[i,]%*%rbind(1,0,0,1,1,1) 
    mu[i,34] <-t1[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,35] <-t2[i,]%*%rbind(1,0,0,1,1,1)   
    mu[i,36] <-t3[i,]%*%rbind(1,0,0,1,1,1)
    for(j in 1:36){
      pred[i,j]<-  1- pnorm(eps[i]  - (mu[i,j] + beta[i,]%*%rbind(inc[i],age)))
    }
  }
  return(pred)
}

######Third threshold
eps<-as.matrix(cds[,44])

##############Experimental coefficients
cont<- as.matrix(cds[,seq(1,24,4)])
t1<- as.matrix(cds[,seq(2,24,4)])
t2<- as.matrix(cds[,seq(3,24,4)])
t3<- as.matrix(cds[,seq(4,24,4)])
###Treatment invariant coefficients
beta<- as.matrix(cds[,37:38])

#####Estimate posterior predictions
mu<-bpred(cont,t1,t2,t3,eps,beta,inc,age)

###Create dataset
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.9)

Condition<-rep(c("Control", "T_Moldovan", "T_Russian", "T_Gagauz"),9)
Ethnicity<-c(rep("Gagauz",12),rep("Moldovan",12),rep("Russian",12))
Language<-factor(c(rep("Russian monolingual",4),rep("Russian/Gagauz bilingual",4),
            rep("Gagauz monolingual",4), rep("Russian monolingual",4),
            rep("Russian/Moldovan bilingual",4), rep("Moldovan monolingual",4),
            rep("Russian monolingual",4),rep("Russian/Moldovan bilingual",4),
            rep("Russian/Gagauz bilingual",4)))

mu<-data.frame(mum,muhpd,Condition, Ethnicity,Language)
names(mu)[1]<-"value"

####Control
muc<-mu[mu$Condition=="Control",]
muc$levels<-interaction(muc$Ethnicity,muc$Language)
muc$levels<-droplevels(muc$levels)
muc$levels = factor(muc$levels,levels(muc$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(muc$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                      "Russian monolingual", " Russian/Gagauz bilingual",        
                      " Russian monolingual", "Russian/Moldovan bilingual",      
                      "  Russian monolingual", " Russian/Moldovan bilingual",  
                      "Moldovan monolingual")

muc$Ethnicity<-as.character(muc$Ethnicity)

ggplot(data = muc, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position=c(.75,.25))  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

#####Subgroup
mu$levels<-interaction(mu$Ethnicity,mu$Language,mu$Condition)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,sort(levels(mu$levels)))
mu$levels = factor(mu$levels,levels(mu$levels)[c(1:4,9:12,5:8,17:24,13:16,29:32,25:28,33:36)])

levels(mu$levels)<-c("Control", "T_Gagauz", "T_Moldovan", "T_Russian", 
                     " Control", " T_Gagauz", " T_Moldovan", " T_Russian",
                     "  Control", "  T_Gagauz", "  T_Moldovan", "  T_Russian",
                     "   Control", "   T_Gagauz", "   T_Moldovan", "   T_Russian",
                     "    Control", "    T_Gagauz", "    T_Moldovan", "    T_Russian",
                     "     Control", "     T_Gagauz", "     T_Moldovan", "     T_Russian",
                     "      Control", "      T_Gagauz", "      T_Moldovan", "      T_Russian",
                     "       Control", "       T_Gagauz", "       T_Moldovan", "       T_Russian",
                     "        Control", "        T_Gagauz", "        T_Moldovan", "        T_Russian")

mu$Ethnicity<-as.character(mu$Ethnicity)
mu$Language<- factor(mu$Language,levels(mu$Language)[c(1,4,3,5,2)])


ggplot(data = mu[which(mu$Ethnicity=="Gagauz"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

ggplot(data = mu[which(mu$Ethnicity=="Moldovan"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

ggplot(data = mu[which(mu$Ethnicity=="Russian"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

#######################Non-experimental analysis
#Function to estimate posterior probability respondent w different ethnic and linguistic
#characteristics supports a given separatist outcome
bpred<-function(cont,eps,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=9)
  pred<-matrix(nrow=nrow(cont),ncol=9)
  for(i in 1:nrow(cont)){
    ###monolingual R Gag
    mu[i,1] <-cont[i,]%*%rbind(1,0,0,0,0,0,inc[i],age)
    ###bilingual R/G Gag
    mu[i,2] <-cont[i,]%*%rbind(1,0,0,1,0,0,inc[i],age)
    ###monolingual G Gag
    mu[i,3] <-cont[i,]%*%rbind(1,1,0,1,0,0,inc[i],age)
    ###monolingual R Mol
    mu[i,4] <-cont[i,]%*%rbind(1,0,0,0,1,0,inc[i],age)
    ###bilingual R/M Mol
    mu[i,5] <-cont[i,]%*%rbind(1,0,1,0,1,0,inc[i],age)
    ###monolingual M Moldovans 
    mu[i,6] <-cont[i,]%*%rbind(1,1,1,0,1,0,inc[i],age)
    ###monolingual R Rus
    mu[i,7] <-cont[i,]%*%rbind(1,0,0,0,1,1,inc[i],age)
    ###bilingual R/M Rus
    mu[i,8] <-cont[i,]%*%rbind(1,0,1,0,1,1,inc[i],age)
    ###bilingual R/G Russians
    mu[i,9] <-cont[i,]%*%rbind(1,0,0,1,1,1,inc[i],age)
    for(j in 1:9){
      pred[i,j]<-  1- pnorm(eps[i]  - mu[i,j])
    }
  }
  return(pred)
}

####Third threshold
eps<-as.matrix(cds[,154])

####Outcome coefficients
#####Supports integration with Moldova
contMol<- as.matrix(cds[,c(seq(54,95,7),seq(117,130,7))])
######Supports autonomy in Moldova
contAut<- as.matrix(cds[,c(seq(55,95,7),seq(118,130,7))])
####Supports confederation with Moldova
contConf<- as.matrix(cds[,c(seq(56,95,7),seq(119,130,7))])
########Supports independence
contInd<- as.matrix(cds[,c(seq(57,95,7),seq(120,130,7))])
##########Supports integration with Russia
contRus<- as.matrix(cds[,c(seq(58,95,7),seq(121,130,7))])
#######Supports autonomy within Russia
contRA<- as.matrix(cds[,c(seq(59,95,7),seq(122,130,7))])
########Supports confederation with Russia
contRC<- as.matrix(cds[,c(seq(60,95,7),seq(123,130,7))])

####Estimate for independence support; to estimate other outcomes use relevant matrix
mu<-bpred(contInd,eps,inc,age)

####Estimate posterior median and HPD interval
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.9)

#########Dataset
Language<-as.factor(c("Russian monolingual","Russian/Gagauz bilingual","Gagauz monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Moldovan monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Russian/Gagauz bilingual"))
Ethnicity <- as.factor(c(rep("Gagauz", 3),rep("Moldovan", 3),rep("Russian", 3)))
mu<-data.frame(mum,muhpd,Language,Ethnicity)
names(mu)[1]<-"value"

mu$levels<-interaction(mu$Ethnicity,mu$Language)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,levels(mu$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(mu$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                     "Russian monolingual", " Russian/Gagauz bilingual",        
                     " Russian monolingual", "Russian/Moldovan bilingual",      
                     "  Russian monolingual", " Russian/Moldovan bilingual",  
                     "Moldovan monolingual")

mu$Ethnicity<-as.character(mu$Ethnicity)

##Figure M.3
ggplot(data = mu, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position="top")  + 
  coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

###################################################
###############Latent variable analysis, Appendix N
###################################################

###Data file
hmod<-list(N=nrow(gatudata), NDV=7, NDV2=2,t0=rep(0,4),T0=diag(.1,4), NL=3,
           y=array(c(as.matrix(gatudata[,c("sr1","sr2","sr3","ur1","ur2","ur3","ur4")]),
                     as.matrix(gatudata[,c("sm1","sm2","sm3","um1","um2","um3","um4")]),
                     as.matrix(gatudata[,c("sg1","sg2","sg3","ug1","ug2","ug3","ug4")])),
                   dim=c(nrow(gatudata),7,3)),
           y2=array(c(as.matrix(gatudata[,c("rsf","ruf")]),as.matrix(gatudata[,c("msf","muf")]),
                      as.matrix(gatudata[,c("gsf","guf")])),dim=c(nrow(gatudata),2,3)))

####Initial threshold values
taustar=rbind(c(qnorm(mean(gatudata$rsf<2,na.rm=T)),qnorm(mean(gatudata$rsf<3,na.rm=T)),qnorm(mean(gatudata$rsf<4,na.rm=T)),qnorm(mean(gatudata$rsf<5,na.rm=T))),
              c(qnorm(mean(gatudata$ruf<2,na.rm=T)),qnorm(mean(gatudata$ruf<3,na.rm=T)),qnorm(mean(gatudata$ruf<4,na.rm=T)),qnorm(mean(gatudata$ruf<5,na.rm=T))))

##hmodel <- run.jags(method="parallel",model="lv_sp.txt", monitor=c("alpha","beta","tau","beta2","xi"), 
##                   inits=list(taustar=taustar), data=hmod, n.chains=4, burnin=10000, 
##                   sample=1250, thin=80, summarise=F,plots=F, modules=c("glm","lecuyer"))

##save(hmodel, file = "gf_sp.RData")
load("gf_sp.RData")

cds<-as.mcmc.list(hmodel)

###Check convergence
gd<-gelman.diag(cds[,1:24])
sum(gd[[1]][,1]>1.1)

####Convert to single chain
cds<-as.mcmc(hmodel)

##################Difficulty graphic
difficulty<-cds[,c(1:7,15:22)] 
difficulty[,8:15]<- -difficulty[,8:15] ###Inverse order for thresholds

####Median and HPD interval
dm<-apply(difficulty,2,median)
dhpd<-HPDinterval(as.mcmc(difficulty))

###Data frame
d.ob<-data.frame(Question=c("Introductions","Talk about day","Discuss complex issues","Understand directions",
                            "Understand basic discussions","Understand quick speech","Understand hard TV", 
                            "Speaking T1", "Comprehension T1", "Speaking T2", "Comprehension T2", 
                            "Speaking T3", "Comprehension T3", "Speaking T4", "Comprehension T4"),dm,dhpd,Type=c(rep("Can do",7),rep(c("Speaking","Comprehension"),4)))
d.ob<-d.ob[order(d.ob[,2]),]
d.ob$Question<-factor(d.ob$Question,levels=d.ob[,1])

####Figure N.1.B
ggplot(data = d.ob, aes(Question, dm, color=Type))  + 
  geom_point(size = 2, position=position_dodge(width=0.5)) +
  geom_linerange(aes(ymin = lower, ymax = upper), size=1,show.legend = F,alpha=I(.25)) + 
  theme_bw() + labs(y="", x="") + scale_color_viridis_d() + 
  theme(legend.position="top", text = element_text(size=20))  + coord_flip() + ggtitle("Posterior estimates of difficulty parameters")

##################Discrimination graphic
disc<-cds[,c(8:14,23:24)] 

####Median and HPDinterval
dm<-apply(disc,2,median)
dhpd<-HPDinterval(as.mcmc(disc))

###Data frame
d.ob<-data.frame(Question=c("Introductions","Talk about day","Discuss complex issues","Understand directions",
                            "Understand basic discussions","Understand quick speech","Understand hard TV","Spoken","Comprehension"),dm,dhpd)
d.ob<-d.ob[order(d.ob[,2]),]
d.ob$Question<-factor(d.ob$Question,levels=d.ob[,1])

####Figure N.1.B
ggplot(data = d.ob, aes(Question, dm))  + 
  geom_point(size = 2, position=position_dodge(width=0.5)) +
  geom_linerange(aes(ymin = lower, ymax = upper), size=1,show.legend = F,alpha=I(.25)) + 
  theme_bw() + labs(y="", x="") +
  theme(legend.position="top", text = element_text(size=20))  + coord_flip(ylim=c(0,7)) + ggtitle("Posterior estimates of discrimination \nparameters")

#######################################
###############Latent variable graphics
#######################################

######Russian estimates
xi<-cds[,25:860] 

####Median and HPD
xim<-apply(xi,2,median)
xihpd<-HPDinterval(as.mcmc(xi))

####Spoken proficiency
Speaking<-as.factor(gatudata$rsf)

###Data frame
pl.ob<-data.frame(id=as.factor(seq(1:836)),xim,xihpd,Speaking)
pl.ob<-pl.ob[order(pl.ob[,2]),]
pl.ob$id<-factor(pl.ob$id,levels=pl.ob[,1])

####Figure N.2.B
ggplot(data = pl.ob, aes(id, xim,color=Speaking))  + 
  geom_point(size = 2, position=position_dodge(width=0.5)) +
  geom_linerange(aes(ymin = lower, ymax = upper), size=1,show.legend = F,alpha=I(.25)) + 
  theme_bw() + labs(y="", x="") + scale_fill_viridis_d() + scale_color_viridis_d() +
  theme(legend.position="top", text = element_text(size=20), axis.text.y = element_blank(),axis.ticks.y=element_blank())  + coord_flip(ylim=c(-2.5,2.5)) + ggtitle("Posterior estimates of Russian speaking proficiency")

###############Gagauz estimates
xiU<-cds[,1697:2532] 

###Median and HPD interval
xiUm<-apply(xiU,2,median)
xiUhpd<-HPDinterval(as.mcmc(xiU))

###############Spoken proficiency
Speaking<-as.factor(gatudata$gsf)

#####Data frame
pl.ob<-data.frame(id=as.factor(seq(1:836)),xiUm,xiUhpd,Speaking)
pl.ob<-pl.ob[order(pl.ob[,2]),]
pl.ob$id<-factor(pl.ob$id,levels=pl.ob[,1])

####Figure N.2.B
ggplot(data = pl.ob, aes(id, xiUm,color=Speaking))  + 
  geom_point(size = 2, position=position_dodge(width=0.5),alpha=I(.25)) +
  geom_linerange(aes(ymin = lower, ymax = upper), size=1,show.legend = F,alpha=I(.25)) + 
  theme_bw() + labs(y="", x="") + scale_fill_viridis_d() + scale_color_viridis_d() +
  theme(legend.position="top", text = element_text(size=20), axis.text.y = element_blank(),axis.ticks.y=element_blank())  + coord_flip(ylim=c(-2.5,2.5)) + ggtitle("Posterior estimates of Gagauz speaking proficiency")


###############Moldovan estimates
xiM<-cds[,861:1696] 

####Median and HPD interval
xiMm<-apply(xiM,2,median)
xiMhpd<-HPDinterval(as.mcmc(xiM))

###############Spoken proficiency
Speaking<-as.factor(gatudata$msf)

###Data frame
pl.ob<-data.frame(id=as.factor(seq(1:836)),xiMm,xiMhpd,Speaking)
pl.ob<-pl.ob[order(pl.ob[,2]),]
pl.ob$id<-factor(pl.ob$id,levels=pl.ob[,1])

####Figure N.2.B
ggplot(data = pl.ob, aes(id, xiMm,color=Speaking))  + 
  geom_point(size = 2, position=position_dodge(width=0.5),alpha=I(.25)) +
  geom_linerange(aes(ymin = lower, ymax = upper), size=1,show.legend = F,alpha=I(.25)) + 
  theme_bw() + labs(y="", x="") + scale_fill_viridis_d() + scale_color_viridis_d() +
  theme(legend.position="top", text = element_text(size=20), axis.text.y = element_blank(),axis.ticks.y=element_blank())  + coord_flip(ylim=c(-2.5,2.5)) + ggtitle("Posterior estimates of Moldovan speaking proficiency")


##############Add estimates to dataset
##gatudata$xiR<-xim
##gatudata$xiM<-xiMm
##gatudata$xiG<-xiUm
##save(gatudata, file="gatudata.RData")

#########################################################################################
#############Regression analyses of support for separatism on latent linguistic abilities
#########################################################################################

#####Initial experimental threshold values
taustar<-polr(method="probit", as.factor(DV1) ~ xiR + xiM + xiG + NGagauz + Russian + Ukrainian + Bulgarian + Other + income + 
                age + male + highered + urban, data=gatudata)$zeta

####Initial observational threshold values
gammastar<-matrix(nrow=7,ncol=4)
for(i in 1:7){
  gammastar[i,]<-polr(method="probit", as.factor(gatudata[,i+25]) ~ gatudata$xiR + gatudata$xiM + 
                        gatudata$xiG + gatudata$NGagauz + gatudata$Russian + gatudata$Ukrainian + 
                        gatudata$Bulgarian + gatudata$Other + gatudata$income + gatudata$age + 
                        gatudata$male + gatudata$highered + gatudata$urban)$zeta
}
gammastar<-apply(gammastar,2,mean)

###########Data file
hmod<-list(N=nrow(gatudata), tg=gatudata$tg, 
           y1=gatudata$DV1, y2=as.matrix(gatudata[,26:32]), 
           g0=rep(0,4), G0=diag(.01,4), 
           t0=rep(0,3), T0=diag(.01,3), 
           rf=gatudata$xiR, mf=gatudata$xiM, gf=gatudata$xiG, NGagauz=gatudata$NGagauz, 
           Other=gatudata$Other, Ukrainian=gatudata$Ukrainian, 
           Russian=gatudata$Russian, Bulgarian=gatudata$Bulgarian, 
           Male=gatudata$male, 
           Age=gatudata$age, Urban=gatudata$urban,
           NIOTA=14, I0=diag(1,14), i0=rep(0,14), 
           NALPHA=9, A0=diag(1,9), a0=rep(0,9), 
           B0=diag(1,5), b0=rep(0,5), 
           HigherEd=gatudata$highered, Married=gatudata$married, Locality=gatudata$locality, 
           Prof=gatudata$prof, Resp=gatudata$resp, House=gatudata$house, ymin=gatudata$ymin, 
           ymax=gatudata$ymax, 
           bI0=rep(0,5), BI0=diag(1,5), ae1=rep(1,14))

##hmodel <- run.jags(method="parallel",model="gFc.txt", inits=list(gammastar=gammastar,
##                                                                 taustar=taustar), 
##                   monitor=c("alpha", "beta", "tau", "mu.a",
##                             "iota", "gamma",
##                             "pr", "pm",  "pg",
##                             "bI", "betaI","gammaI","alphaI",
##                             "pIe", "pIm","e1", "rIl","rIh",
##                             "Income"), data=hmod, n.chains=4, adapt=5000, burnin=50000, 
##                   sample=1000, thin=500, summarise=F,plots=F, modules=c("glm", "lecuyer"))
##save(hmodel, file = "gF1xi.RData")
load("gF1xi.RData")

########Analyze convergence
cds<-as.mcmc.list(hmodel)
gd<-gelman.diag(cds[,1:216], multivariate = F)
sum(gd[[1]][,1]>1.1)

####Convert to single chain
cds<-as.mcmc(hmodel)

###Average income and age across posterior draws
inc<- apply(as.matrix(cds[,217:1052]),1,mean)
age<-mean(gatudata$age)

############################experimental analysis
#############Function to estimate posterior probability respondent w different linguistic 
##and ethnic characteristics supports separatism across experimental conditions
#####Note that language is estimated at high and low modal values (.75, -1.5)
bpred<-function(cont,t1,t2,t3,eps,beta,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=36)
  pred<-matrix(nrow=nrow(cont),ncol=36)
  for(i in 1:nrow(cont)){
    ###Gagauz R monolingual
    mu[i,1] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,0,0)
    mu[i,2] <-t1[i,]%*%rbind(1,.75,-1.5,-1.5,0,0)
    mu[i,3] <-t2[i,]%*%rbind(1,.75,-1.5,-1.5,0,0)
    mu[i,4] <-t3[i,]%*%rbind(1,.75,-1.5,-1.5,0,0)
    ###Gagauz R/G bilingual
    mu[i,5] <-cont[i,]%*%rbind(1,.75,-1.5,.75,0,0) 
    mu[i,6] <-t1[i,]%*%rbind(1,.75,-1.5,.75,0,0) 
    mu[i,7] <-t2[i,]%*%rbind(1,.75,-1.5,.75,0,0) 
    mu[i,8] <-t3[i,]%*%rbind(1,.75,-1.5,.75,0,0) 
    ###Gagauz G monolingual
    mu[i,9] <-cont[i,]%*%rbind(1,-1.5,-1.5,.75,0,0) 
    mu[i,10] <-t1[i,]%*%rbind(1,-1.5,-1.5,.75,0,0) 
    mu[i,11] <-t2[i,]%*%rbind(1,-1.5,-1.5,.75,0,0) 
    mu[i,12] <-t3[i,]%*%rbind(1,-1.5,-1.5,.75,0,0) 
    ###Moldovan R monolingual
    mu[i,13] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,1,0) 
    mu[i,14] <-t1[i,]%*%rbind(1,.75,-1.5,-1.5,1,0)   
    mu[i,15] <-t2[i,]%*%rbind(1,.75,-1.5,-1.5,1,0)   
    mu[i,16] <-t3[i,]%*%rbind(1,.75,-1.5,-1.5,1,0)       
    ###Moldovan R/M bilingual
    mu[i,17] <-cont[i,]%*%rbind(1,.75,.75,-1.5,1,0) 
    mu[i,18] <-t1[i,]%*%rbind(1,.75,.75,-1.5,1,0)   
    mu[i,19] <-t2[i,]%*%rbind(1,.75,.75,-1.5,1,0)   
    mu[i,20] <-t3[i,]%*%rbind(1,.75,.75,-1.5,1,0)       
    ###Moldovan M monolingual
    mu[i,21] <-cont[i,]%*%rbind(1,-1.5,.75,-1.5,1,0) 
    mu[i,22] <-t1[i,]%*%rbind(1,-1.5,.75,-1.5,1,0)  
    mu[i,23] <-t2[i,]%*%rbind(1,-1.5,.75,-1.5,1,0)  
    mu[i,24] <-t3[i,]%*%rbind(1,-1.5,.75,-1.5,1,0)    
    ###Russian R monolingual
    mu[i,25] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,1,1) 
    mu[i,26] <-t1[i,]%*%rbind(1,.75,-1.5,-1.5,1,1)   
    mu[i,27] <-t2[i,]%*%rbind(1,.75,-1.5,-1.5,1,1)   
    mu[i,28] <-t3[i,]%*%rbind(1,.75,-1.5,-1.5,1,1)       
    ###Russian R/M bilingual
    mu[i,29] <-cont[i,]%*%rbind(1,.75,.75,-1.5,1,1) 
    mu[i,30] <-t1[i,]%*%rbind(1,.75,.75,-1.5,1,1)   
    mu[i,31] <-t2[i,]%*%rbind(1,.75,.75,-1.5,1,1)   
    mu[i,32] <-t3[i,]%*%rbind(1,.75,.75,-1.5,1,1)
    ###Russian R/G bilingual
    mu[i,33] <-cont[i,]%*%rbind(1,.75,-1.5,.75,1,1) 
    mu[i,34] <-t1[i,]%*%rbind(1,.75,-1.5,.75,1,1)   
    mu[i,35] <-t2[i,]%*%rbind(1,.75,-1.5,.75,1,1)   
    mu[i,36] <-t3[i,]%*%rbind(1,.75,-1.5,.75,1,1)
    for(j in 1:36){
      pred[i,j]<-  1- pnorm(eps[i]  - (mu[i,j] + beta[i,]%*%rbind(inc[i],age)))
    }
  }
  return(pred)
}

####Third threshold
eps<-as.matrix(cds[,44])

#####Experimental coefficients
cont<- as.matrix(cds[,seq(1,24,4)])
t1<- as.matrix(cds[,seq(2,24,4)])
t2<- as.matrix(cds[,seq(3,24,4)])
t3<- as.matrix(cds[,seq(4,24,4)])
###Treatment invariant coefficients
beta<- as.matrix(cds[,37:38])

#####Estimate posterior probability of support for separatism
mu<-bpred(cont,t1,t2,t3,eps,beta,inc,age)

#####Estimate median and HPD interval
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.95)

#####Data frame
Condition<-rep(c("Control", "T_Moldovan", "T_Russian", "T_Gagauz"),9)
Ethnicity<-c(rep("Gagauz",12),rep("Moldovan",12),rep("Russian",12))
Language<-factor(c(rep("Russian monolingual",4),rep("Russian/Gagauz bilingual",4),
            rep("Gagauz monolingual",4), rep("Russian monolingual",4),
            rep("Russian/Moldovan bilingual",4), rep("Moldovan monolingual",4),
            rep("Russian monolingual",4),rep("Russian/Moldovan bilingual",4),
            rep("Russian/Gagauz bilingual",4)))


mu<-data.frame(mum,muhpd,Condition, Ethnicity,Language)
names(mu)[1]<-"value"

####Control
muc<-mu[mu$Condition=="Control",]
muc$levels<-interaction(muc$Ethnicity,muc$Language)
muc$levels<-droplevels(muc$levels)
muc$levels = factor(muc$levels,levels(muc$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(muc$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                      "Russian monolingual", " Russian/Gagauz bilingual",        
                      " Russian monolingual", "Russian/Moldovan bilingual",      
                      "  Russian monolingual", " Russian/Moldovan bilingual",  
                      "Moldovan monolingual")
muc$Ethnicity<-as.character(muc$Ethnicity)

####Figure N.6.A
ggplot(data = muc, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position=c(.75,.25))  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))



#####Subgroup
mu$levels<-interaction(mu$Ethnicity,mu$Language,mu$Condition)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,sort(levels(mu$levels)))
mu$levels = factor(mu$levels,levels(mu$levels)[c(1:4,9:12,5:8,17:24,13:16,29:32,25:28,33:36)])

levels(mu$levels)<-c("Control", "T_Gagauz", "T_Moldovan", "T_Russian", 
                     " Control", " T_Gagauz", " T_Moldovan", " T_Russian",
                     "  Control", "  T_Gagauz", "  T_Moldovan", "  T_Russian",
                     "   Control", "   T_Gagauz", "   T_Moldovan", "   T_Russian",
                     "    Control", "    T_Gagauz", "    T_Moldovan", "    T_Russian",
                     "     Control", "     T_Gagauz", "     T_Moldovan", "     T_Russian",
                     "      Control", "      T_Gagauz", "      T_Moldovan", "      T_Russian",
                     "       Control", "       T_Gagauz", "       T_Moldovan", "       T_Russian",
                     "        Control", "        T_Gagauz", "        T_Moldovan", "        T_Russian")

mu$Ethnicity<-as.character(mu$Ethnicity)
mu$Language<- factor(mu$Language,levels(mu$Language)[c(1,4,3,5,2)])

####Figure N.6.B
ggplot(data = mu[which(mu$Ethnicity=="Gagauz"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

###Not reported
ggplot(data = mu[which(mu$Ethnicity=="Moldovan"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

##Not reported
ggplot(data = mu[which(mu$Ethnicity=="Russian"),], 
       aes(levels,value,shape=Language, color=Language))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(4.5,8.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="",linetype="") + guides(colour = guide_legend(nrow = 2)) + 
  theme(text = element_text(size=15), legend.position="top")  + coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))

#######################Observational analyses
#Function to estimate posterior probability respondent w different ethnic and linguistic
#characteristics supports a given separatist outcome
bpred<-function(cont,eps,inc,age){
  mu<-matrix(nrow=nrow(cont),ncol=9)
  pred<-matrix(nrow=nrow(cont),ncol=9)
  for(i in 1:nrow(cont)){
    ###monolingual R Gagauz
    mu[i,1] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,0,0,inc[i],age)
    ###bilingual R/G Gagauz
    mu[i,2] <-cont[i,]%*%rbind(1,.75,-1.5,.75,0,0,inc[i],age)
    ###monolingual G Gagauz
    mu[i,3] <-cont[i,]%*%rbind(1,-1.5,-1.5,.75,0,0,inc[i],age)
    ###monolingual R Moldovan
    mu[i,4] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,1,0,inc[i],age)
    ###bilingual R/M Moldovan
    mu[i,5] <-cont[i,]%*%rbind(1,.75,.75,-1.5,1,0,inc[i],age)
    ###monolingual M Moldovan
    mu[i,6] <-cont[i,]%*%rbind(1,-1.5,.75,-1.5,1,0,inc[i],age)
    ###monolingual R Russian
    mu[i,7] <-cont[i,]%*%rbind(1,.75,-1.5,-1.5,1,1,inc[i],age)
    ###bilingual R/M Russian
    mu[i,8] <-cont[i,]%*%rbind(1,.75,.75,-1.5,1,1,inc[i],age)
    ###bilingual R/G Russian
    mu[i,9] <-cont[i,]%*%rbind(1,.75,-1.5,.75,1,1,inc[i],age)
    for(j in 1:9){
      pred[i,j]<-  1- pnorm(eps[i]  - mu[i,j])
    }
  }
  return(pred)
}

####Third threshold
eps<-as.matrix(cds[,154])
###Support integration w Moldova
contMol<- as.matrix(cds[,c(seq(54,95,7),seq(117,130,7))])
###Support autonomy in Moldova
contAut<- as.matrix(cds[,c(seq(55,95,7),seq(118,130,7))])
###Support confederation w Moldova
contConf<- as.matrix(cds[,c(seq(56,95,7),seq(119,130,7))])
###Support independence
contInd<- as.matrix(cds[,c(seq(57,95,7),seq(120,130,7))])
###Support integration w Russia
contRus<- as.matrix(cds[,c(seq(58,95,7),seq(121,130,7))])
###Support autonomy in Russia
contRA<- as.matrix(cds[,c(seq(59,95,7),seq(122,130,7))])
###Support confederation w Russia
contRC<- as.matrix(cds[,c(seq(60,95,7),seq(123,130,7))])

######Estimate probability of support for independence. Replace with relevant matrix to estimate
#other outcomes
mu<-bpred(contInd,eps,inc,age)

####Estimate median and HPD interval
mum<-apply(mu,2,median)
muhpd<-HPDinterval(as.mcmc(mu),prob=.95)

####Data frame
Language<-as.factor(c("Russian monolingual","Russian/Gagauz bilingual","Gagauz monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Moldovan monolingual",
                      "Russian monolingual", "Russian/Moldovan bilingual", "Russian/Gagauz bilingual"))
Ethnicity <- as.factor(c(rep("Gagauz", 3),rep("Moldovan", 3),rep("Russian", 3)))

mu<-data.frame(mum,muhpd,Language,Ethnicity)
names(mu)[1]<-"value"

mu$levels<-interaction(mu$Ethnicity,mu$Language)
mu$levels<-droplevels(mu$levels)
mu$levels = factor(mu$levels,levels(mu$levels)[c(1,6,3,7,5,9,4,8,2)])
levels(mu$levels)<-c("Gagauz monolingual", "Russian/Gagauz bilingual", 
                     "Russian monolingual", " Russian/Gagauz bilingual",        
                     " Russian monolingual", "Russian/Moldovan bilingual",      
                     "  Russian monolingual", " Russian/Moldovan bilingual",  
                     "Moldovan monolingual")
mu$Ethnicity<-as.character(mu$Ethnicity)

######Figure N.5
ggplot(data = mu, aes(levels,value,color=Ethnicity, shape=Ethnicity))  + 
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), size=1,width = .01, 
                position=position_dodge(width=0.5)) + 
  geom_vline(xintercept = c(3.5,6.5)) + 
  theme_bw() + labs(y="", x="", color="",shape="") + 
  theme(text = element_text(size=15), legend.position="")  + 
  coord_flip(ylim=c(0,1)) + scale_color_grey() +
  scale_shape_manual(values=c(15,16,17))